###Appendix replication of Figures 5, 6 , 7 and 8--with weights

library(plyr)
library(dplyr)
library(MASS)
library(stargazer)

setwd("C:/Users/kevin/Dropbox/Social_Media_Lab/Data_Survey/YouGov_Data/apsr_replication/")


load("combined_w_raked_weights.RData")


data<-uk

## Recode the Twitter frequency use variable to better reflect the underlying categories
## Roughly, how many times per month do you go on Twitter?
data$twitter_freq_inc<-as.numeric(
  revalue(as.character(data$twitter_freq_inc) , 
          c("7"= "90", "6"="30", "5" = " 15", "4" = "7", "3"="2", 
            "2"="1", "1"="0")))

###need the all_tweets variable un-logged, and to avoid dividing by zero

data$tweets_all_topics<-(1.0001 +data$tweets_all_topics)

data$all_tweets_log<-1/log(data$tweets_all_topics)

#########################################################################
##Loop over all models
#########################################################################



controls <- c("woman", "age", "lowerclass", "profile_education_age", 
              "white_british", "married", "newsnight_freq", "religious",
              "internet_freq_inc",  "newspaper_type" )




full_parties <- c("Labour", "UKIP", "LibDem", "Tories")

parties <- c("labour", "UKIP", "libdem", "conserv")
issues <- c("EU", "spending", "immigration")
waves <- c("w1", "w2", "w3", "w4")
t_parties <- c("labo", "ukip", "lide", "tory")

##need to change t_issues to accomodate isis

qs <- c("isis","unemployment",  "immigrants")
t_issues <- c("isis", "economy", "immigr")
t_waves <- c("p1", "p2", "p3", "p4")
pid <- c(2, 5, 3, 1)
tt_issues <- c("isis", "econ", "imm")





##
names<-list()

models_combo=list()
models_lm=list()
models_full=list()
models_full_raw=list()


labo_means=list()
labo_ses=list()
ukip_means=list()
ukip_ses=list()
lide_means=list()
lide_ses=list()
tory_means=list()
tory_ses=list()
right_media_means=list()
right_media_ses=list()
cent_media_means=list()
cent_media_ses=list()
left_media_means=list()
left_media_ses=list()




## Here the outcome is whether or not they were correct in wave 3,
## the baseline is whether or not they were correct in wave 2
## We go from Wave 2 to Wave 3 because these are the only waves we asked these questions

for (j in 1:length(qs)){
  
  data$outcome<-abs(eval(parse(text=paste0("data$fact_" , qs[j], 
                                           "_w3_correct_dk0" ))))
  
  data$baseline<-abs(eval(parse(text=paste0("data$fact_" , qs[j], 
                                            "_w2_correct_dk0"))))
  
  ##Look at the relevant tweets from each party in the 3rd time period
  data$tweet_count_labo<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_labo"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_labo")))
  )
  
  data$tweet_count_lide<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_lide"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_lide")))
  )
  
  data$tweet_count_ukip<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_ukip"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_ukip")))
  )
  
  data$tweet_count_tory<-  (
    eval(parse(text=paste0("data$p3_1", "_", t_issues[j], "_tory"))) +
      eval(parse(text=paste0("data$p3_2", "_", t_issues[j], "_tory")))
  )
  
  
  
  
  data$right_media<-{ 
    + eval(parse(text=paste0("data$p3_media_right_", t_issues[j])))
  }
  
  data$right_media_log<-log(1.00001 + data$right_media)
  
  data$left_media<-{ 
    + eval(parse(text=paste0("data$p3_media_left_", t_issues[j])))
  }
  
  data$left_media_log<-log(1.00001 + data$left_media)
  
  
  data$cent_media<-{ 
    + eval(parse(text=paste0("data$p3_media_cent_", t_issues[j])))
  }
  
  data$cent_media_log<-log(1.00001 + data$cent_media)
  
  
  
  ##create aggregate variables 
  data$parties<-log(1.0001+ 
                      (data$tweet_count_lide) + (data$tweet_count_labo) +
                      (data$tweet_count_ukip) + (data$tweet_count_tory))
  
  data$media<-log(1.0001+data$cent_media + data$left_media + data$right_media)
  
  data$total<-log(1.0001+ 
                    (data$tweet_count_lide) + (data$tweet_count_labo) +
                    (data$tweet_count_ukip) + (data$tweet_count_tory) +
                    data$cent_media + data$left_media + data$right_media)
  
  
  
  ##log the main variables 
  
  data$tweet_count_tory<-log(1.00001 + data$tweet_count_tory)
  
  data$tweet_count_labo<-log(1.00001 + data$tweet_count_labo)
  
  data$tweet_count_lide<-log(1.00001 + data$tweet_count_lide)
  
  data$tweet_count_ukip<-log(1.00001 + data$tweet_count_ukip)
  
  
  ##various model specifications
  
  #full
  models_full[[j]]<-summary(glm(paste0("outcome ~ baseline + total + twitter_freq_inc  +"
                                          , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial",
                                weights = raked_weights ))
  
  models_full_raw[[j]]<-(glm(paste0("outcome ~ baseline + total + twitter_freq_inc  +"
                                       , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial", weights = raked_weights))
  
  #combo
  models_combo[[j]]<-summary(glm(paste0("outcome ~ baseline + media + parties + twitter_freq_inc  +"
                                       , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial", weights = raked_weights))
  
  #no interactions
  
  models_lm[[j]]<-summary(glm(paste0("outcome ~ baseline + tweet_count_labo + tweet_count_ukip + tweet_count_lide + tweet_count_tory +  
                                    right_media_log + left_media_log + cent_media_log+ twitter_freq_inc  + "
                                    , paste0(controls, collapse="+")), data=data, na.action=na.exclude, family="binomial", weights = raked_weights))
  
}

################graphing--all tweets

full_qs<-c("ISIS", "Unemployment", "Immigration")


  names<-c("ISIS", "Unemployment", "Immigration")
  index<-c(1, 2, 3)
  ##extract mean values 
  means<-c(models_full[[1]]$coefficients["total","Estimate"],
           models_full[[2]]$coefficients["total","Estimate"],
           models_full[[3]]$coefficients["total","Estimate"]
           
           
  )
  
  ##extract standard errors values 
  ses<-c(models_full[[1]]$coefficients["total","Std. Error"],
         models_full[[2]]$coefficients["total","Std. Error"],
         models_full[[3]]$coefficients["total","Std. Error"]
  )
  
  
  means<-unlist(means)
  ses<-unlist(ses)
  
  ##plot 
  pdf("results/all_w2w3_fact_correct_total_weighted.pdf", 8,4)
  par(mar=c(4, 8, 4, 2)  )
  plot( means, index, xlim=c(-.6, .6), ylab="", yaxt="n", xlab="Logistic regression coefficients on number of relevant tweets",
     #   main = paste0("Weighted Effects of Tweets on Issue-Specific Accuracy"), pch=3)
     main = "", pch=3)
  
  axis(2, at=index, labels =paste0(names), las=2)
  abline(v=0)
  points(means + 1.64*ses, index, pch="|")
  points(means - 1.64*ses, index, pch="|")
  for(i in 1:length(means)){
    lines(c(means[i] - 1.96*ses[i],means[i] + 1.96*ses[i]), c(index[i],index[i]))
  }
  
  dev.off()
  
  


################graphing--combined results

full_qs<-c("ISIS", "Unemployment", "Immigration")

all_means<-vector()
all_ses<-vector()


for(i in 1:length(models_combo)){
  names<-c("Parties", "Media")
  index<-c(.1, .2)
  ##extract mean values 
  means<-c(models_combo[[i]]$coefficients["parties","Estimate"],
           models_combo[[i]]$coefficients["media","Estimate"])
  
  ##extract standard errors values 
  ses<-c(models_combo[[i]]$coefficients["parties","Std. Error"],
         models_combo[[i]]$coefficients["media","Std. Error"])  
  
  
  all_means<-c(all_means,unlist(means))
  all_ses<-c(all_ses,unlist(ses))
  
 
  
}





####################################

#Combine into one graph


##plot 
pdf(paste0("results/", "all_w2w3_fact_correct_combo_weighted.pdf"), 12,4)
par(mar=c(4, 10, 4, 2)  )
index<-seq(.1, .6, length.out = 6)
plot( all_means, index, xlim=c(-.6, .6), ylim=c(.09, .61), ylab="", yaxt="n", xlab="Logistic regression coefficients on number of relevant tweets",
    #  main = paste0("Weighted Effects of Tweets on Issue-Specific Accuracy"), pch=3)
    main = "", pch=3)


names<- c("ISIS: Parties", "ISIS: Media", "Unemployment: Parties","Unemployment: Media","Immigration: Parties",
          "Immigration: Media"          )
axis(2, at=index, labels =paste0(names), las=2)
abline(v=0)
points(all_means + 1.64*all_ses, index, pch="|")
points(all_means - 1.64*all_ses, index, pch="|")
for(i in 1:length(all_means)){
  lines(c(all_means[i] - 1.96*all_ses[i],all_means[i] + 1.96*all_ses[i]), c(index[i],index[i]))
}

dev.off()




##########################graphing--divided results 

full_qs<-c("ISIS", "Unemployment", "Immigration")

for(i in 1:length(full_qs)){
  
  names<-c(full_parties, "Right media",  "Cent media","Left media")
  index<-seq(7)
  ##extract mean values 
  means<-c(models_lm[[i]]$coefficients["tweet_count_labo","Estimate"],
           models_lm[[i]]$coefficients["tweet_count_ukip","Estimate"],
           models_lm[[i]]$coefficients["tweet_count_lide","Estimate"],
           models_lm[[i]]$coefficients["tweet_count_tory","Estimate"],
           models_lm[[i]]$coefficients["right_media_log","Estimate"],
           models_lm[[i]]$coefficients["cent_media_log","Estimate"],
           models_lm[[i]]$coefficients["left_media_log","Estimate"]
  )
  
  ##extract standard errors values 
  ses<-c(models_lm[[i]]$coefficients["tweet_count_labo","Std. Error"],
         models_lm[[i]]$coefficients["tweet_count_ukip","Std. Error"],
         models_lm[[i]]$coefficients["tweet_count_lide","Std. Error"],
         models_lm[[i]]$coefficients["tweet_count_tory","Std. Error"],
         models_lm[[i]]$coefficients["right_media_log","Std. Error"],
         models_lm[[i]]$coefficients["cent_media_log","Std. Error"],
         models_lm[[i]]$coefficients["left_media_log","Std. Error"]
  )  
  
  
  
  means<-unlist(means)
  ses<-unlist(ses)
  
  ##plot 
  pdf(paste0("results/", full_qs[i],"_w2w3_fact_correct_weighted.pdf"), 12,8)
  par(mar=c(4, 8, 4, 2)  )
  plot( means, index,   xlim=c(-.6, .6), ylab="", yaxt="n", xlab="Logistic regression coefficients on number of relevant tweets",
    #    main = paste0("Weighted Effects of Tweets on Issue-Specific Accuracy: ", full_qs[i]), pch=3)
    main = "", pch=3)
  
    axis(2, at=index, labels =paste0(names), las=2)
  abline(v=0)
  points(means + 1.64*ses, index, pch="|")
  points(means - 1.64*ses, index, pch="|")
  for(i in 1:length(means)){
    lines(c(means[i] - 1.96*ses[i],means[i] + 1.96*ses[i]), c(index[i],index[i]))
  }
  
  dev.off()
  
  
}
